/**
* Class for a file system based file backend.
+ * Containers are just directories and container sharding is not supported.
+ * Also, for backwards-compatibility, the wiki ID prefix is not used.
+ * Users of this class should set wiki-specific container paths as needed.
+ *
* Status messages should avoid mentioning the internal FS paths.
* Likewise, error suppression should be used to avoid path disclosure.
*
* containerPaths : Map of container names to absolute file system paths
* fileMode : Octal UNIX file permissions to use on files stored
*/
- function __construct( array $config ) {
+ public function __construct( array $config ) {
parent::__construct( $config );
$this->containerPaths = (array)$config['containerPaths'];
foreach ( $this->containerPaths as &$path ) {
protected function doStoreInternal( array $params ) {
$status = Status::newGood();
- list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] );
+ list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] );
if ( $dest === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['dst'] );
return $status;
protected function doCopyInternal( array $params ) {
$status = Status::newGood();
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['src'] );
return $status;
}
- list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] );
+ list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] );
if ( $dest === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['dst'] );
return $status;
protected function doMoveInternal( array $params ) {
$status = Status::newGood();
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['src'] );
return $status;
}
- list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] );
+ list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] );
if ( $dest === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['dst'] );
return $status;
protected function doDeleteInternal( array $params ) {
$status = Status::newGood();
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['src'] );
return $status;
protected function doCreateInternal( array $params ) {
$status = Status::newGood();
- list( $c, $dest ) = $this->resolveStoragePath( $params['dst'] );
+ list( $c, $dest ) = $this->resolveStoragePathReal( $params['dst'] );
if ( $dest === null ) {
$status->fatal( 'backend-fail-invalidpath', $params['dst'] );
return $status;
}
/**
- * @see FileBackend::prepare()
+ * @see FileBackend::doPrepare()
*/
- function prepare( array $params ) {
+ protected function doPrepare( $container, $dir, array $params ) {
$status = Status::newGood();
- list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] );
- if ( $dir === null ) {
- $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
- return $status; // invalid storage path
- }
if ( !wfMkdirParents( $dir ) ) {
$status->fatal( 'directorycreateerror', $params['dir'] );
- return $status;
} elseif ( !is_writable( $dir ) ) {
$status->fatal( 'directoryreadonlyerror', $params['dir'] );
- return $status;
} elseif ( !is_readable( $dir ) ) {
$status->fatal( 'directorynotreadableerror', $params['dir'] );
- return $status;
}
return $status;
}
/**
- * @see FileBackend::secure()
+ * @see FileBackend::doSecure()
*/
- function secure( array $params ) {
+ protected function doSecure( $container, $dir, array $params ) {
$status = Status::newGood();
- list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] );
- if ( $dir === null ) {
- $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
- return $status; // invalid storage path
- }
if ( !wfMkdirParents( $dir ) ) {
$status->fatal( 'directorycreateerror', $params['dir'] );
return $status;
}
- // Add a .htaccess file to the root of the deleted zone
- if ( !empty( $params['noAccess'] ) && !file_exists( "{$dir}/.htaccess" ) ) {
+ // Seed new directories with a blank index.html, to prevent crawling...
+ if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) {
wfSuppressWarnings();
- $ok = file_put_contents( "{$dir}/.htaccess", "Deny from all\n" );
+ $ok = file_put_contents( "{$dir}/index.html", '' );
wfRestoreWarnings();
if ( !$ok ) {
- $status->fatal( 'backend-fail-create', $params['dir'] . '/.htaccess' );
+ $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' );
return $status;
}
}
- // Seed new directories with a blank index.html, to prevent crawling
- if ( !empty( $params['noListing'] ) && !file_exists( "{$dir}/index.html" ) ) {
+ // Add a .htaccess file to the root of the container...
+ list( $b, $container, $r ) = FileBackend::splitStoragePath( $params['dir'] );
+ $dirRoot = $this->containerPaths[$container]; // real path
+ if ( !empty( $params['noAccess'] ) && !file_exists( "{$dirRoot}/.htaccess" ) ) {
wfSuppressWarnings();
- $ok = file_put_contents( "{$dir}/index.html", '' );
+ $ok = file_put_contents( "{$dirRoot}/.htaccess", "Deny from all\n" );
wfRestoreWarnings();
if ( !$ok ) {
- $status->fatal( 'backend-fail-create', $params['dir'] . '/index.html' );
+ $storeDir = "mwstore://{$this->name}/{$container}";
+ $status->fatal( 'backend-fail-create', "$storeDir/.htaccess" );
return $status;
}
}
}
/**
- * @see FileBackend::clean()
+ * @see FileBackend::doClean()
*/
- function clean( array $params ) {
+ protected function doClean( $container, $dir, array $params ) {
$status = Status::newGood();
- list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] );
- if ( $dir === null ) {
- $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
- return $status; // invalid storage path
- }
wfSuppressWarnings();
if ( is_dir( $dir ) ) {
rmdir( $dir ); // remove directory if empty
/**
* @see FileBackend::fileExists()
*/
- function fileExists( array $params ) {
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ public function fileExists( array $params ) {
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
return false; // invalid storage path
}
/**
* @see FileBackend::getFileTimestamp()
*/
- function getFileTimestamp( array $params ) {
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ public function getFileTimestamp( array $params ) {
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
return false; // invalid storage path
}
}
/**
- * @see FileBackend::getFileList()
+ * @see FileBackend::getFileListInternal()
*/
- function getFileList( array $params ) {
- list( $c, $dir ) = $this->resolveStoragePath( $params['dir'] );
- if ( $dir === null ) { // invalid storage path
- return null;
- }
+ public function getFileListInternal( $container, $dir, array $params ) {
wfSuppressWarnings();
$exists = is_dir( $dir );
wfRestoreWarnings();
/**
* @see FileBackend::getLocalReference()
*/
- function getLocalReference( array $params ) {
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ public function getLocalReference( array $params ) {
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
return null;
}
/**
* @see FileBackend::getLocalCopy()
*/
- function getLocalCopy( array $params ) {
- list( $c, $source ) = $this->resolveStoragePath( $params['src'] );
+ public function getLocalCopy( array $params ) {
+ list( $c, $source ) = $this->resolveStoragePathReal( $params['src'] );
if ( $source === null ) {
return null;
}
- // Get source file extension
- $i = strrpos( $source, '.' );
- $ext = strtolower( $i ? substr( $source, $i + 1 ) : '' );
- // Create a new temporary file...
+ // Create a new temporary file with the same extension...
+ $ext = FileBackend::extensionFromPath( $params['src'] );
$tmpFile = TempFSFile::factory( wfBaseName( $source ) . '_', $ext );
if ( !$tmpFile ) {
return null;
/** @var Array */
protected $cache = array(); // (storage path => key => value)
protected $maxCacheSize = 50; // integer; max paths with entries
+ /** @var Array */
+ protected $shardViaHashLevels = array(); // (container name => integer)
/**
* Create a file in the backend with the given contents.
* Do not call this function from places outside FileBackend and FileOp.
* $params include:
* srcs : ordered source storage paths (e.g. chunk1, chunk2, ...)
- * dst : destination storage path
+ * dst : file system path to 0-byte temp file
* overwriteDest : overwrite any file that exists at the destination
*
* @param $params Array
*/
final public function concatenateInternal( array $params ) {
$status = $this->doConcatenateInternal( $params );
- $this->clearCache( array( $params['dst'] ) );
return $status;
}
/**
* @see FileBackendBase::prepare()
*/
- public function prepare( array $params ) {
+ final public function prepare( array $params ) {
+ $status = Status::newGood();
+ list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+ if ( $dir === null ) {
+ $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
+ return $status; // invalid storage path
+ }
+ if ( $shard !== null ) { // confined to a single container/shard
+ $status->merge( $this->doPrepare( $fullCont, $dir, $params ) );
+ } else { // directory is on several shards
+ wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+ list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+ foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
+ $status->merge( $this->doPrepare( "{$fullCont}{$suffix}", $dir, $params ) );
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * @see FileBackend::prepare()
+ */
+ protected function doPrepare( $container, $dir, array $params ) {
return Status::newGood();
}
/**
* @see FileBackendBase::secure()
*/
- public function secure( array $params ) {
+ final public function secure( array $params ) {
+ $status = Status::newGood();
+ list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+ if ( $dir === null ) {
+ $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
+ return $status; // invalid storage path
+ }
+ if ( $shard !== null ) { // confined to a single container/shard
+ $status->merge( $this->doSecure( $fullCont, $dir, $params ) );
+ } else { // directory is on several shards
+ wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+ list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+ foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
+ $status->merge( $this->doSecure( "{$fullCont}{$suffix}", $dir, $params ) );
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * @see FileBackend::secure()
+ */
+ protected function doSecure( $container, $dir, array $params ) {
return Status::newGood();
}
/**
* @see FileBackendBase::clean()
*/
- public function clean( array $params ) {
+ final public function clean( array $params ) {
+ $status = Status::newGood();
+ list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+ if ( $dir === null ) {
+ $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
+ return $status; // invalid storage path
+ }
+ if ( $shard !== null ) { // confined to a single container/shard
+ $status->merge( $this->doClean( $fullCont, $dir, $params ) );
+ } else { // directory is on several shards
+ wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+ list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+ foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
+ $status->merge( $this->doClean( "{$fullCont}{$suffix}", $dir, $params ) );
+ }
+ }
+ return $status;
+ }
+
+ /**
+ * @see FileBackend::clean()
+ */
+ protected function doClean( $container, $dir, array $params ) {
return Status::newGood();
}
return $status;
}
+ /**
+ * @see FileBackendBase::getFileList()
+ */
+ final public function getFileList( array $params ) {
+ list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
+ if ( $dir === null ) { // invalid storage path
+ return null;
+ }
+ if ( $shard !== null ) {
+ // File listing is confined to a single container/shard
+ return $this->getFileListInternal( $fullCont, $dir, $params );
+ } else {
+ wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
+ // File listing spans multiple containers/shards
+ list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
+ return new ContainerShardListIterator( $this,
+ $fullCont, $this->getContainerSuffixes( $shortCont ), $params );
+ }
+ }
+
+ /**
+ * Do not call this function from places outside FileBackend and ContainerFileListIterator
+ *
+ * @param $container string Resolved container name
+ * @param $dir string Resolved path relative to container
+ * @param $params Array
+ * @see FileBackend::getFileList()
+ */
+ abstract public function getFileListInternal( $container, $dir, array $params );
+
/**
* Get the list of supported operations and their corresponding FileOp classes.
*
// This accounts for Swift and S3 restrictions. Also note
// that these urlencode to the same string, which is useful
// since the Swift size limit is *after* URL encoding.
- return preg_match( '/^[a-zA-Z0-9._-]{1,256}$/u', $container );
+ // Limit to 200 to leave room for '.shard-XX' or '.segment'.
+ return preg_match( '/^[a-zA-Z0-9._-]{1,200}$/u', $container );
}
/**
}
/**
- * Split a storage path (e.g. "mwstore://backend/container/path/to/object")
- * into an internal container name and an internal relative object name.
- * This also checks that the storage path is valid and is within this backend.
+ * Splits a storage path into an internal container name,
+ * an internal relative object name, and a container shard suffix.
+ * Any shard suffix is already appended to the internal container name.
+ * This also checks that the storage path is valid and within this backend.
+ *
+ * If the container is sharded but a suffix could not be determined,
+ * this means that the path can only refer to a directory and can only
+ * be scanned by looking in all the container shards.
*
* @param $storagePath string
- * @return Array (container, object name) or (null, null) if path is invalid
+ * @return Array (container, path, container suffix) or (null, null, null) if invalid
*/
final protected function resolveStoragePath( $storagePath ) {
list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath );
if ( $backend === $this->name ) { // must be for this backend
$relPath = self::normalizeStoragePath( $relPath );
if ( $relPath !== null ) {
+ // Get shard for the normalized path if this container is sharded
+ $cShard = $this->getContainerShard( $container, $relPath );
+ // Validate and sanitize the relative path (backend-specific)
$relPath = $this->resolveContainerPath( $container, $relPath );
if ( $relPath !== null ) {
+ // Prepend any wiki ID prefix to the container name
$container = $this->fullContainerName( $container );
if ( self::isValidContainerName( $container ) ) {
- $container = $this->resolveContainerName( $container );
+ // Validate and sanitize the container name (backend-specific)
+ $container = $this->resolveContainerName( "{$container}{$cShard}" );
if ( $container !== null ) {
- return array( $container, $relPath );
+ return array( $container, $relPath, $cShard );
}
}
}
}
}
+ return array( null, null, null );
+ }
+
+ /**
+ * Like resolveStoragePath() except null values are returned if
+ * the container is sharded and the shard could not be determined.
+ *
+ * @see FileBackend::resolveStoragePath()
+ *
+ * @param $storagePath string
+ * @return Array (container, path) or (null, null) if invalid
+ */
+ final protected function resolveStoragePathReal( $storagePath ) {
+ list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath );
+ if ( $cShard !== null ) {
+ return array( $container, $relPath );
+ }
return array( null, null );
}
+ /**
+ * Get the container name shard suffix for a given path.
+ * Any empty suffix means the container is not sharded.
+ *
+ * @param $container string Container name
+ * @param $relStoragePath string Storage path relative to the container
+ * @return string|null Returns null if shard could not be determined
+ */
+ final protected function getContainerShard( $container, $relPath ) {
+ $hashLevels = $this->getContainerHashLevels( $container );
+ if ( $hashLevels === 1 ) { // 16 shards per container
+ $hashDirRegex = '(?P<shard>[0-9a-f])';
+ } elseif ( $hashLevels === 2 ) { // 256 shards per container
+ $hashDirRegex = '[0-9a-f]/(?P<shard>[0-9a-f]{2})';
+ } else {
+ return ''; // no sharding
+ }
+ // Allow certain directories to be above the hash dirs so as
+ // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab").
+ // They must be 2+ chars to avoid any hash directory ambiguity.
+ if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) {
+ return '.shard-' . str_pad( $m['shard'], $hashLevels, '0', STR_PAD_LEFT );
+ }
+ return null; // failed to match
+ }
+
+ /**
+ * Get the number of hash levels for a container.
+ * If greater than 0, then all file storage paths within
+ * the container are required to be hashed accordingly.
+ *
+ * @param $container string
+ * @return integer
+ */
+ final protected function getContainerHashLevels( $container ) {
+ if ( isset( $this->shardViaHashLevels[$container] ) ) {
+ $hashLevels = (int)$this->shardViaHashLevels[$container];
+ if ( $hashLevels >= 0 && $hashLevels <= 2 ) {
+ return $hashLevels;
+ }
+ }
+ return 0; // no sharding
+ }
+
+ /**
+ * Get a list of full container shard suffixes for a container
+ *
+ * @param $container string
+ * @return Array
+ */
+ final protected function getContainerSuffixes( $container ) {
+ $shards = array();
+ $digits = $this->getContainerHashLevels( $container );
+ if ( $digits > 0 ) {
+ $numShards = 1 << ( $digits * 4 );
+ for ( $index = 0; $index < $numShards; $index++ ) {
+ $shards[] = '.shard-' . str_pad( dechex( $index ), $digits, '0', STR_PAD_LEFT );
+ }
+ }
+ return $shards;
+ }
+
/**
* Get the full container name, including the wiki ID prefix
*
* getting absolute paths (e.g. FS based backends). Note that the relative path
* may be the empty string (e.g. the path is simply to the container).
*
- * @param $container string Container the path is relative to
- * @param $relStoragePath string Relative storage path
+ * @param $container string Container name
+ * @param $relStoragePath string Storage path relative to the container
* @return string|null Path or null if not valid
*/
protected function resolveContainerPath( $container, $relStoragePath ) {
return strtolower( $i ? substr( $path, $i + 1 ) : '' );
}
}
+
+/**
+ * FileBackend helper function to handle file listings that span container shards.
+ * Do not use this class from places outside of FileBackend.
+ *
+ * @ingroup FileBackend
+ */
+class ContainerShardListIterator implements Iterator {
+ /* @var FileBackend */
+ protected $backend;
+ /* @var Array */
+ protected $params;
+ /* @var Array */
+ protected $shardSuffixes;
+ protected $container; // string
+ protected $directory; // string
+
+ /* @var Traversable */
+ protected $iter;
+ protected $curShard = 0; // integer
+ protected $pos = 0; // integer
+
+ /**
+ * @param $backend FileBackend
+ * @param $container string Full storage container name
+ * @param $dir string Storage directory relative to container
+ * @param $suffixes Array List of container shard suffixes
+ * @param $params Array
+ */
+ public function __construct(
+ FileBackend $backend, $container, $dir, array $suffixes, array $params
+ ) {
+ $this->backend = $backend;
+ $this->container = $container;
+ $this->directory = $dir;
+ $this->shardSuffixes = $suffixes;
+ $this->params = $params;
+ }
+
+ public function current() {
+ if ( is_array( $this->iter ) ) {
+ return current( $this->iter );
+ } else {
+ return $this->iter->current();
+ }
+ }
+
+ public function key() {
+ return $this->pos;
+ }
+
+ public function next() {
+ ++$this->pos;
+ if ( is_array( $this->iter ) ) {
+ next( $this->iter );
+ } else {
+ $this->iter->next();
+ }
+ // Find the next non-empty shard if no elements are left
+ $this->nextShardIteratorIfNotValid();
+ }
+
+ /**
+ * If the iterator for this container shard is out of items,
+ * then move on to the next container that has items.
+ */
+ protected function nextShardIteratorIfNotValid() {
+ while ( !$this->valid() ) {
+ if ( ++$this->curShard >= count( $this->shardSuffixes ) ) {
+ break; // no more container shards
+ }
+ $this->setIteratorFromCurrentShard();
+ }
+ }
+
+ protected function setIteratorFromCurrentShard() {
+ $suffix = $this->shardSuffixes[$this->curShard];
+ $this->iter = $this->backend->getFileListInternal(
+ "{$this->container}{$suffix}", $this->directory, $this->params );
+ }
+
+ public function rewind() {
+ $this->pos = 0;
+ $this->curShard = 0;
+ $this->setIteratorFromCurrentShard();
+ // Find the next non-empty shard if this one has no elements
+ $this->nextShardIteratorIfNotValid();
+ }
+
+ public function valid() {
+ if ( $this->iter == null ) {
+ return false; // some failure?
+ } elseif ( is_array( $this->iter ) ) {
+ return ( current( $this->iter ) !== false ); // no paths can have this value
+ } else {
+ return $this->iter->valid();
+ }
+ }
+}